Load Packages

library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidygeocoder)
library(MapGAM)
## Loading required package: sp
## Loading required package: gam
## Loading required package: splines
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## Loaded gam 1.22-5
## 
## Loading required package: survival
library(tidycensus)
library(flextable)
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(tmap)


Geocoding

Question 1. Read in In N Out Burger

glimpse(innout)

download.file("https://raw.githubusercontent.com/pjames-ucdavis/SPH215/refs/heads/main/innout_final.csv", "innout_final.csv", mode = "wb")
in_n_out <- read_csv("innout_final.csv")
## Rows: 30 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): store address
## dbl (1): Store Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.


Question 2. Glimpse at the data. Looks like store address is the variable to geocode!

glimpse(in_n_out)
## Rows: 30
## Columns: 2
## $ `Store Number`  <dbl> 131, 327, 276, 217, 225, 247, 320, 319, 197, 207, 143,…
## $ `store address` <chr> "3501 Truxel Road Sacramento CA 95834", "8200 Delta Sh…


Question 3. Geocode the data using “arcgis” geocoder.

innout_geo      <- geocode(in_n_out, address = "store address",
                          method = "arcgis")
## Passing 30 addresses to the ArcGIS single address geocoder
## Query completed in: 16.7 seconds


Question 4. Looks like there’s no missingness! No NAs!

summary(innout_geo$lat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35.99   37.99   38.39   38.12   38.59   38.76
summary(innout_geo$long)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -122.7  -122.0  -121.5  -121.5  -121.3  -119.7


Question 5. We use st_as_sf to make the dataset spatial and we use the crs of 4326

innout_pts <- st_as_sf(innout_geo, coords=c("long","lat"), crs=4326)


Question 6. Map the addreses in red below!

tmap_mode("view")
## ℹ tmap mode set to "view".
innout_map = tm_shape(innout_pts) + tm_dots(col = "red", size = 0.3, alpha = 0.5)
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_dots()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').[v3->v4] `tm_dots()`: use `fill_alpha` instead of `alpha`.
innout_map

Question 7. Map the addreses in blue below!

tmap_mode("view")
## ℹ tmap mode set to "view".
innout_map_blue = tm_shape(innout_pts) + tm_dots(col = "blue", size = 0.3, alpha = 0.5)
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_dots()`: use `fill_alpha` instead of `alpha`.
innout_map_blue

Map Point Health Data

Question 8. Import CAdata dataset

#Load CAdata dataset from MapGAM package
data(CAdata)
ca_pts <- CAdata
summary(ca_pts)
##       time               event              X                 Y          
##  Min.   : 0.004068   Min.   :0.0000   Min.   :1811375   Min.   :-241999  
##  1st Qu.: 1.931247   1st Qu.:0.0000   1st Qu.:2018363   1st Qu.: -94700  
##  Median : 4.749980   Median :1.0000   Median :2325084   Median : -60386  
##  Mean   : 6.496130   Mean   :0.6062   Mean   :2230219   Mean   :  87591  
##  3rd Qu.: 9.609031   3rd Qu.:1.0000   3rd Qu.:2380230   3rd Qu.: 318280  
##  Max.   :24.997764   Max.   :1.0000   Max.   :2705633   Max.   : 770658  
##       AGE         INS      
##  Min.   :25.00   Mcd: 431  
##  1st Qu.:53.00   Mcr:1419  
##  Median :62.00   Mng:2304  
##  Mean   :61.28   Oth: 526  
##  3rd Qu.:71.00   Uni: 168  
##  Max.   :80.00   Unk: 152
ca_pts <- st_as_sf(CAdata, coords=c("X","Y"))


Question 9. Add CRS

#Load the projection into an object called ca_proj
ca_proj <- "+proj=lcc +lat_1=40 +lat_2=41.66666666666666 
             +lat_0=39.33333333333334 +lon_0=-122 +x_0=2000000 
             +y_0=500000.0000000002 +ellps=GRS80 
             +datum=NAD83 +units=m +no_defs"

#Set CRS
ca_pts_crs <- st_set_crs(ca_pts, ca_proj)


Question 10. Create a map of the CAdata dataset with points color coded by quintiles of age.

cancer_map_age = tm_shape(ca_pts_crs) + tm_dots(col = "AGE", size = 0.3, style = "quantile")
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_dots()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style' to 'tm_scale_intervals(<HERE>)'
cancer_map_age
## Registered S3 method overwritten by 'jsonify':
##   method     from    
##   print.json jsonlite


Question 11. Map by insurance status

cancer_map_ins = tm_shape(ca_pts_crs) + tm_dots(col = "INS", size = 0.3, style = "cat")
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_dots()`: instead of `style = "cat"`, use fill.scale =
## `tm_scale_categorical()`.
cancer_map_ins


Census Data

Question 12. Use the tidycensus package to download American Community Survey Five year estimates from 2018-2022 in California at the Census tract level for the following variable

ca <- get_acs(geography = "tract", 
              year = 2022,
              variables = c(medinc = "B19013_001"), 
              state = "CA",
              survey = "acs5",
              output = "wide")
## Getting data from the 2018-2022 5-year ACS


Question 13. Take a glimpse at your data.

glimpse(ca)
## Rows: 9,129
## Columns: 4
## $ GEOID   <chr> "06001400100", "06001400200", "06001400300", "06001400400", "0…
## $ NAME    <chr> "Census Tract 4001; Alameda County; California", "Census Tract…
## $ medincE <dbl> 234236, 225500, 164000, 158836, 95078, 155893, 110556, 97132, …
## $ medincM <dbl> 42845, 29169, 44675, 9038, 34125, 33824, 12318, 37490, 31849, …


Question 14. Give us the mean median household income across tracts in California.

ca %>%
  summarize(Mean = mean(medincE))
## # A tibble: 1 × 1
##    Mean
##   <dbl>
## 1    NA


Question 15. Make a table showing the mean, median, and standard deviation of median household income across tract in California, and make it look fancy!

summarytable <- ca %>%
  summarize(Mean = mean(medincE),
            Median = median(medincE),
            SD = sd(medincE)
            )
my_table <- flextable(summarytable)
my_table

Mean

Median

SD


Question 16. Create a histogram of median household income across tracts in California. Make sure it has a label.

ca %>%
  ggplot() + 
  geom_histogram(mapping = aes(x=medincE)) +
  xlab("Median household income")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 141 rows containing non-finite outside the scale range
## (`stat_bin()`).


Question 17. OK now give me a boxplot! And I want a nice label for this too!

ca %>%
  ggplot() +
    geom_boxplot(mapping = aes(y = medincE)) +
    ylab("Median household income")
LS0tCnRpdGxlOiAiTGFiIDEgQXNzaWdubWVudCIKYXV0aG9yOiAiUGV0ZXIgSmFtZXMiCmRhdGU6ICIyMDI1LTA0LTE3IgpvdXRwdXQ6IGh0bWxfZG9jdW1lbnQKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCkxvYWQgUGFja2FnZXMKYGBge3J9CmxpYnJhcnkoc2YpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHlnZW9jb2RlcikKbGlicmFyeShNYXBHQU0pCmxpYnJhcnkodGlkeWNlbnN1cykKbGlicmFyeShmbGV4dGFibGUpCmxpYnJhcnkodG1hcCkKYGBgCgpcCgojIEdlb2NvZGluZwoKUXVlc3Rpb24gMS4gUmVhZCBpbiBJbiBOIE91dCBCdXJnZXIKCmdsaW1wc2UoaW5ub3V0KQpgYGB7cn0KZG93bmxvYWQuZmlsZSgiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3BqYW1lcy11Y2RhdmlzL1NQSDIxNS9yZWZzL2hlYWRzL21haW4vaW5ub3V0X2ZpbmFsLmNzdiIsICJpbm5vdXRfZmluYWwuY3N2IiwgbW9kZSA9ICJ3YiIpCmluX25fb3V0IDwtIHJlYWRfY3N2KCJpbm5vdXRfZmluYWwuY3N2IikKYGBgCgpcCgpRdWVzdGlvbiAyLiBHbGltcHNlIGF0IHRoZSBkYXRhLiBMb29rcyBsaWtlICpzdG9yZSBhZGRyZXNzKiBpcyB0aGUgdmFyaWFibGUgdG8gZ2VvY29kZSEKYGBge3J9CmdsaW1wc2UoaW5fbl9vdXQpCmBgYAoKXAoKUXVlc3Rpb24gMy4gR2VvY29kZSB0aGUgZGF0YSB1c2luZyAiYXJjZ2lzIiBnZW9jb2Rlci4KYGBge3J9Cmlubm91dF9nZW8gICAgICA8LSBnZW9jb2RlKGluX25fb3V0LCBhZGRyZXNzID0gInN0b3JlIGFkZHJlc3MiLAogICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJhcmNnaXMiKQpgYGAKClwKClF1ZXN0aW9uIDQuIExvb2tzIGxpa2UgdGhlcmUncyBubyBtaXNzaW5nbmVzcyEgTm8gTkFzIQpgYGB7cn0Kc3VtbWFyeShpbm5vdXRfZ2VvJGxhdCkKc3VtbWFyeShpbm5vdXRfZ2VvJGxvbmcpCmBgYAoKXAoKUXVlc3Rpb24gNS4gV2UgdXNlIHN0X2FzX3NmIHRvIG1ha2UgdGhlIGRhdGFzZXQgc3BhdGlhbCBhbmQgd2UgdXNlIHRoZSBjcnMgb2YgNDMyNgpgYGB7cn0KaW5ub3V0X3B0cyA8LSBzdF9hc19zZihpbm5vdXRfZ2VvLCBjb29yZHM9YygibG9uZyIsImxhdCIpLCBjcnM9NDMyNikKYGBgCgpcCgpRdWVzdGlvbiA2LiBNYXAgdGhlIGFkZHJlc2VzIGluIHJlZCBiZWxvdyEKYGBge3J9CnRtYXBfbW9kZSgidmlldyIpCmlubm91dF9tYXAgPSB0bV9zaGFwZShpbm5vdXRfcHRzKSArIHRtX2RvdHMoY29sID0gInJlZCIsIHNpemUgPSAwLjMsIGFscGhhID0gMC41KQppbm5vdXRfbWFwCmBgYAoKClF1ZXN0aW9uIDcuIE1hcCB0aGUgYWRkcmVzZXMgaW4gYmx1ZSBiZWxvdyEKYGBge3J9CnRtYXBfbW9kZSgidmlldyIpCmlubm91dF9tYXBfYmx1ZSA9IHRtX3NoYXBlKGlubm91dF9wdHMpICsgdG1fZG90cyhjb2wgPSAiYmx1ZSIsIHNpemUgPSAwLjMsIGFscGhhID0gMC41KQppbm5vdXRfbWFwX2JsdWUKYGBgCgojIE1hcCBQb2ludCBIZWFsdGggRGF0YQoKUXVlc3Rpb24gOC4gSW1wb3J0IENBZGF0YSBkYXRhc2V0CmBgYHtyfQojTG9hZCBDQWRhdGEgZGF0YXNldCBmcm9tIE1hcEdBTSBwYWNrYWdlCmRhdGEoQ0FkYXRhKQpjYV9wdHMgPC0gQ0FkYXRhCnN1bW1hcnkoY2FfcHRzKQpjYV9wdHMgPC0gc3RfYXNfc2YoQ0FkYXRhLCBjb29yZHM9YygiWCIsIlkiKSkKCmBgYAoKXAoKUXVlc3Rpb24gOS4gQWRkIENSUwpgYGB7cn0KCiNMb2FkIHRoZSBwcm9qZWN0aW9uIGludG8gYW4gb2JqZWN0IGNhbGxlZCBjYV9wcm9qCmNhX3Byb2ogPC0gIitwcm9qPWxjYyArbGF0XzE9NDAgK2xhdF8yPTQxLjY2NjY2NjY2NjY2NjY2IAogICAgICAgICAgICAgK2xhdF8wPTM5LjMzMzMzMzMzMzMzMzM0ICtsb25fMD0tMTIyICt4XzA9MjAwMDAwMCAKICAgICAgICAgICAgICt5XzA9NTAwMDAwLjAwMDAwMDAwMDIgK2VsbHBzPUdSUzgwIAogICAgICAgICAgICAgK2RhdHVtPU5BRDgzICt1bml0cz1tICtub19kZWZzIgoKI1NldCBDUlMKY2FfcHRzX2NycyA8LSBzdF9zZXRfY3JzKGNhX3B0cywgY2FfcHJvaikKYGBgCgpcCgpRdWVzdGlvbiAxMC4gQ3JlYXRlIGEgbWFwIG9mIHRoZSBDQWRhdGEgZGF0YXNldCB3aXRoIHBvaW50cyBjb2xvciBjb2RlZCBieSBxdWludGlsZXMgb2YgYWdlLgpgYGB7cn0KY2FuY2VyX21hcF9hZ2UgPSB0bV9zaGFwZShjYV9wdHNfY3JzKSArIHRtX2RvdHMoY29sID0gIkFHRSIsIHNpemUgPSAwLjMsIHN0eWxlID0gInF1YW50aWxlIikKY2FuY2VyX21hcF9hZ2UKYGBgCgpcCgpRdWVzdGlvbiAxMS4gTWFwIGJ5IGluc3VyYW5jZSBzdGF0dXMKYGBge3J9CmNhbmNlcl9tYXBfaW5zID0gdG1fc2hhcGUoY2FfcHRzX2NycykgKyB0bV9kb3RzKGNvbCA9ICJJTlMiLCBzaXplID0gMC4zLCBzdHlsZSA9ICJjYXQiKQpjYW5jZXJfbWFwX2lucwpgYGAKClwKCiMgQ2Vuc3VzIERhdGEKClF1ZXN0aW9uIDEyLiBVc2UgdGhlIHRpZHljZW5zdXMgcGFja2FnZSB0byBkb3dubG9hZCBBbWVyaWNhbiBDb21tdW5pdHkgU3VydmV5IEZpdmUgeWVhciBlc3RpbWF0ZXMgZnJvbSAyMDE4LTIwMjIgaW4gQ2FsaWZvcm5pYSBhdCB0aGUgQ2Vuc3VzIHRyYWN0IGxldmVsIGZvciB0aGUgZm9sbG93aW5nIHZhcmlhYmxlCmBgYHtyfQpjYSA8LSBnZXRfYWNzKGdlb2dyYXBoeSA9ICJ0cmFjdCIsIAogICAgICAgICAgICAgIHllYXIgPSAyMDIyLAogICAgICAgICAgICAgIHZhcmlhYmxlcyA9IGMobWVkaW5jID0gIkIxOTAxM18wMDEiKSwgCiAgICAgICAgICAgICAgc3RhdGUgPSAiQ0EiLAogICAgICAgICAgICAgIHN1cnZleSA9ICJhY3M1IiwKICAgICAgICAgICAgICBvdXRwdXQgPSAid2lkZSIpCgpgYGAKXAoKUXVlc3Rpb24gMTMuCVRha2UgYSBnbGltcHNlIGF0IHlvdXIgZGF0YS4KYGBge3J9CmdsaW1wc2UoY2EpCmBgYAoKXAoKClF1ZXN0aW9uIDE0LglHaXZlIHVzIHRoZSBtZWFuIG1lZGlhbiBob3VzZWhvbGQgaW5jb21lIGFjcm9zcyB0cmFjdHMgaW4gQ2FsaWZvcm5pYS4KYGBge3J9CmNhICU+JQogIHN1bW1hcml6ZShNZWFuID0gbWVhbihtZWRpbmNFKSkKYGBgCgpcCgpRdWVzdGlvbiAxNS4JTWFrZSBhIHRhYmxlIHNob3dpbmcgdGhlIG1lYW4sIG1lZGlhbiwgYW5kIHN0YW5kYXJkIGRldmlhdGlvbiBvZiBtZWRpYW4gaG91c2Vob2xkIGluY29tZSBhY3Jvc3MgdHJhY3QgaW4gQ2FsaWZvcm5pYSwgYW5kIG1ha2UgaXQgbG9vayBmYW5jeSEKYGBge3J9CnN1bW1hcnl0YWJsZSA8LSBjYSAlPiUKICBzdW1tYXJpemUoTWVhbiA9IG1lYW4obWVkaW5jRSksCiAgICAgICAgICAgIE1lZGlhbiA9IG1lZGlhbihtZWRpbmNFKSwKICAgICAgICAgICAgU0QgPSBzZChtZWRpbmNFKQogICAgICAgICAgICApCm15X3RhYmxlIDwtIGZsZXh0YWJsZShzdW1tYXJ5dGFibGUpCm15X3RhYmxlCmBgYAoKXAoKUXVlc3Rpb24gMTYuCUNyZWF0ZSBhIGhpc3RvZ3JhbSBvZiBtZWRpYW4gaG91c2Vob2xkIGluY29tZSBhY3Jvc3MgdHJhY3RzIGluIENhbGlmb3JuaWEuIE1ha2Ugc3VyZSBpdCBoYXMgYSBsYWJlbC4KYGBge3J9CmNhICU+JQogIGdncGxvdCgpICsgCiAgZ2VvbV9oaXN0b2dyYW0obWFwcGluZyA9IGFlcyh4PW1lZGluY0UpKSArCiAgeGxhYigiTWVkaWFuIGhvdXNlaG9sZCBpbmNvbWUiKQpgYGAKClwKClF1ZXN0aW9uIDE3LglPSyBub3cgZ2l2ZSBtZSBhIGJveHBsb3QhIEFuZCBJIHdhbnQgYSBuaWNlIGxhYmVsIGZvciB0aGlzIHRvbyEKYGBge3IsIGV2YWw9RkFMU0V9CmNhICU+JQogIGdncGxvdCgpICsKICAgIGdlb21fYm94cGxvdChtYXBwaW5nID0gYWVzKHkgPSBtZWRpbmNFKSkgKwogICAgeWxhYigiTWVkaWFuIGhvdXNlaG9sZCBpbmNvbWUiKQpgYGAKCg==